## Regression using average skin color of friends as dependent variable


ind_controls <- paste("male", "factor(religion_simple)", 
                      "age_grade_distortion", "score_self_esteem", 
                      "score_parents_support", "score_study", sep = "+")

ses_controls <- paste("scores_poverty", "score_neighborhood_quality", 
                      "factor(occ_father_simple_no_job_no_na)",
                      "factor(occ_mother_simple_no_job_no_na)", sep = "+")

class_fe <- paste("factor(class_id)")


regressions_1 <- function(dep_var, race_var, grade_var) {
  # dep_var = "ssi_same_negro_na_2_std"
  # race_var = "factor(negro_na)"
  # grade_var = "grades_score"
  
  # basic regression
  basic_reg <- paste0(dep_var, "~", paste0(race_var,"*", grade_var))
  
  Form_basic <- formula(basic_reg)
  
  # Individual control
  Form_ind <- formula(paste(basic_reg, ind_controls, sep = "+"))
  
  # SES control
  Form_ses <- formula(paste(basic_reg, ses_controls, sep = "+"))
  
  # Individual + SES control
  Form_ind_ses <- formula(paste(basic_reg, ind_controls, 
                                ses_controls, sep = "+"))
  
  # Individual controls + Classroom FE
  Form_ind_class_fe <- formula(paste(basic_reg, ind_controls, 
                                     class_fe, sep = "+"))
  
  # SES controls + Classroom FE
  Form_ses_class_fe <- formula(paste(basic_reg, ses_controls, 
                                     class_fe, sep = "+"))
  
  # Ind. + SES controls + Classroom FE
  Form_ind_ses_class_fe <- formula(paste(basic_reg, ind_controls,
                                         ses_controls, class_fe, 
                                         sep = "+"))
  
  list_specifications <- c(Form_basic, Form_ind, Form_ses, Form_ind_ses,
                           Form_ind_class_fe, Form_ses_class_fe, 
                           Form_ind_ses_class_fe) 
  
  results <- lapply(list_specifications, function(x) (lm(x, data = data, subset = (race < 4))))
  results_r <- lapply(results, robust_se, "HC3")
  results_c <- lapply(results, cluster_se, "class_id")
  
  results_final <- list(results, results_r, results_c)  
  return(results_final)
}

regressions_2_sq <- function(dep_var, race_var, grade_var) {
  # dep_var = "ssi_same_negro_na_2_std"
  # race_var = "factor(race_simple)"
  # grade_var = "grades_score"
  # supply_var = "supply_2_sd_same_race_fr_2"
  
  # basic regression
  basic_reg <- paste0(dep_var, "~", paste0(race_var,"*", grade_var))
  sq_inte <- paste0(race_var,"*", "grades_score_square")
  cb_inte <- paste0(race_var,"*", "grades_score_cubic")
  
  Form_basic <- formula(paste(basic_reg, sq_inte, sep = "+"))
  
  # Individual control
  Form_ind <- formula(paste(basic_reg, sq_inte, ind_controls, sep = "+"))
  
  # SES control
  Form_ses <- formula(paste(basic_reg, sq_inte, ses_controls, sep = "+"))
  
  # Individual + SES control
  Form_ind_ses <- formula(paste(basic_reg, sq_inte, ind_controls, 
                                ses_controls, sep = "+"))
  
  # Individual controls + Classroom FE
  Form_ind_class_fe <- formula(paste(basic_reg, sq_inte, ind_controls, 
                                     class_fe, sep = "+"))
  
  # SES controls + Classroom FE
  Form_ses_class_fe <- formula(paste(basic_reg, sq_inte, ses_controls, 
                                     class_fe, sep = "+"))
  
  # Ind. + SES controls + Classroom FE
  Form_ind_ses_class_fe <- formula(paste(basic_reg, sq_inte, ind_controls,
                                         ses_controls, class_fe, 
                                         sep = "+"))
  
  list_specifications <- c(Form_basic, Form_ind, Form_ses, Form_ind_ses,
                           Form_ind_class_fe, Form_ses_class_fe, 
                           Form_ind_ses_class_fe) 
  
  results <- lapply(list_specifications, function(x) (lm(x, data = data, subset = (race < 4))))
  results_r <- lapply(results, robust_se, "HC3")
  results_c <- lapply(results, cluster_se, "class_id")
  
  results_final <- list(results, results_r, results_c)  
  
  return(results_final)
}

result_avg_skin_fr_2_bi <- regressions_1(dep_var = "avg_skin_fr_2",
                                       race_var = "negro_na", 
                                       grade_var = "grades_score")

result_sh_wh_fr_2_bi <- regressions_1(dep_var = "sh_wh_fr_2",
                                    race_var = "negro_na", 
                                    grade_var = "grades_score")

result_white_friends_2_bi <- regressions_1(dep_var = "white_friends_2",
                                         race_var = "negro_na", 
                                         grade_var = "grades_score")

result_nonwhite_friends_2_bi <- regressions_1(dep_var = "nonwhite_friends_2",
                                            race_var = "negro_na", 
                                            grade_var = "grades_score")


variables_to_omit <- "(age)|(male)|(religion)|(class_id)|(avg_)|
                      |(class)|(school)|(missing)|(EM)|(occ_)|
                      |(poverty)|(Constant)|(supply)|(occ_pai_)|(n_)|
                      |(score_neighborhood_quality)|(score_study)|
                      |(score_parents_support)|(score_self_esteem)"


list_regs <- list(result_white_friends_2_bi[[1]][[7]], 
                    result_nonwhite_friends_2_bi[[1]][[7]], 
                    result_avg_skin_fr_2_bi[[1]][[7]], 
                    result_sh_wh_fr_2_bi[[1]][[7]])

list_regs_c <- list(result_white_friends_2_bi[[3]][[7]], 
                  result_nonwhite_friends_2_bi[[3]][[7]], 
                  result_avg_skin_fr_2_bi[[3]][[7]], 
                  result_sh_wh_fr_2_bi[[3]][[7]])

n_obs <- sapply(list_regs, get_obs)

r_adj <- sapply(list_regs, get_adj_r)

testing <- sapply(list_regs, 
                  function(x) race_score_linear_test(some_reg = x, 
                                                     race_inter = "negro_na",
                                                     test_var = "grades_score"))
F_results <- round(c(testing[1,]),3)
P_results <- round(c(testing[2,]),3)


stargazer(list_regs_c, 
          column.separate = c(2,2,2,2),
          covariate.labels = cov_var_labels_bi,
          dep.var.labels.include = F,
          dep.var.caption = "Social status",
          column.labels = c("Number of white friends", "Number of nonwhite friends", "Skin Color of friends", "Share of white friends"),
          omit = variables_to_omit, 
          omit.stat = c("f", "rsq","ser"),
          add.lines = list(c("Linear Hypothesis:", "",  "",  "",  "",  "",  "",  ""),
                           c("F-statistic:",  F_results),
                           c("P-value", P_results),
                           c("Observation", n_obs),
                           c("Adjusted R2", r_adj)),
          font.size = "tiny", type = "text")

cov_var_labels_bi <- c("Nonwhite", "Grades", "Nonwhite*Grades")

stargazer(list_regs_c, 
          column.separate = c(2,2,2,2),
          covariate.labels = cov_var_labels_bi,
          dep.var.labels.include = F,
          dep.var.caption = "Social status",
          column.labels = c("Number of white friends", "Number of nonwhite friends", "Skin Color of friends", "Share of white friends"),
          omit = variables_to_omit, 
          omit.stat = c("f", "rsq","ser"),
          add.lines = list(c("Linear Hypothesis:", "",  "",  "",  "",  "",  "",  ""),
                           c("F-statistic:",  F_results),
                           c("P-value", P_results),
                           c("Observation", n_obs),
                           c("Adjusted R2", r_adj)),
          font.size = "footnotesize", type = "latex", 
          out = "tables/robustness_other_dep_vars.tex",
          label = "robustness_other_dep_vars",
          title = "Grades, Racial identification and their 
          relationship with other dependent variables")

